{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "m = 100000\n",
    "\n",
    "x = np.random.normal(size=m)\n",
    "X = x.reshape(-1, 1)\n",
    "y = 4. * x + 3. + np.random.normal(0, 3, size=m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import numpy as np\n",
    "# from sklearn.metrics import r2_score\n",
    "\n",
    "# class LinearRegression:\n",
    "    \n",
    "#     def __init__(self):\n",
    "#         \"\"\"初始化Linear Regression模型\"\"\"\n",
    "#         self.coef_ = None\n",
    "#         self.interception_ = None\n",
    "#         self._theta = None\n",
    "        \n",
    "#     def fit_normal(self, X_train, y_train):\n",
    "#         \"\"\"根据训练数据集X_train, y_train训练Linear Regression模型\"\"\"\n",
    "#         assert X_train.shape[0] == y_train.shape[0], \"the size of X_train must be equal to the size of y_train\"\n",
    "        \n",
    "#         X_b = np.hstack([np.ones((len(X_train), 1)), X_train])\n",
    "#         self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)\n",
    "        \n",
    "#         self.interception_ = self._theta[0]\n",
    "#         self.coef_ = self._theta[1:]\n",
    "        \n",
    "#         return self\n",
    "    \n",
    "#     def fit_gd(self, X_train, y_train, eta=0.01, n_iters = 1e4):\n",
    "#         \"\"\"根据训练数据集X_train, y_train，使用梯度下降法训练Linear Regression模型\"\"\"\n",
    "#         assert X_train.shape[0] == y_train.shape[0], \"the size of X_train must be equal to the size of y_train\"\n",
    "        \n",
    "#         def J(theta, X_b, y):\n",
    "#             try:\n",
    "#                 return np.sum((y - X_b.dot(theta))**2) / len(X_b)\n",
    "#             except:\n",
    "#                 return float('inf')\n",
    "            \n",
    "#         def dJ(theta, X_b, y):\n",
    "#             return X_b.T.dot(X_b.dot(theta)-y) * 2. / len(X_b)\n",
    "        \n",
    "#         def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):\n",
    "#             theta = initial_theta\n",
    "#             i_iter = 0\n",
    "#             while i_iter < n_iters:\n",
    "#                 gradient = dJ(theta, X_b, y)\n",
    "#                 last_theta = theta\n",
    "#                 theta = theta - eta * gradient\n",
    "#                 if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):\n",
    "#                     break\n",
    "#                 i_iter += 1\n",
    "#             return theta\n",
    "        \n",
    "#         X_b = np.hstack([np.ones((len(X_train), 1)), X_train])\n",
    "#         initial_theta = np.zeros(X_b.shape[1])\n",
    "#         self._theta = gradient_descent(X_b, y_train, initial_theta, eta)\n",
    "#         self.interception_ = self._theta[0]\n",
    "#         self.coef_ = self._theta[1:]\n",
    "#         return self\n",
    "        \n",
    "#     def predict(self, X_predict):\n",
    "#         \"\"\"给定待预测数据集X_predict，返回表示X_predict的结果向量\"\"\"\n",
    "#         assert self.interception_ is not None and self.coef_ is not None, \"must fit before predict\"\n",
    "#         assert X_predict.shape[1] == len(self.coef_), \"the feature number of X_predict must equal to X_train\"\n",
    "        \n",
    "#         X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])\n",
    "#         return X_b.dot(self._theta)\n",
    "    \n",
    "#     def score(self, X_test, y_test):\n",
    "#         \"\"\"根据测试数据集X_test, y_test确定当前模型的准确度\"\"\"\n",
    "        \n",
    "#         y_predict = self.predict(X_test)\n",
    "#         return r2_score(y_test, y_predict)\n",
    "        \n",
    "#     def __repr__(self):\n",
    "#         return \"LinearRegression()\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def J(theta, X_b, y):\n",
    "    try:\n",
    "        return np.sum((y - X_b.dot(theta))**2) / len(X_b)\n",
    "    except:\n",
    "        return float('inf')\n",
    "\n",
    "def dJ(theta, X_b, y):\n",
    "    return X_b.T.dot(X_b.dot(theta)-y) * 2. / len(X_b)\n",
    "\n",
    "def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):\n",
    "    theta = initial_theta\n",
    "    i_iter = 0\n",
    "    while i_iter < n_iters:\n",
    "        gradient = dJ(theta, X_b, y)\n",
    "        last_theta = theta\n",
    "        theta = theta - eta * gradient\n",
    "        if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):\n",
    "            break\n",
    "        i_iter += 1\n",
    "    return theta\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 2 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "X_b = np.hstack([np.ones((len(X), 1)), X])\n",
    "initial_theta = np.zeros(X_b.shape[1])\n",
    "eta = 0.01\n",
    "theta = gradient_descent(X_b, y, initial_theta, eta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3.00456203, 3.98777265])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "theta"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 随机梯度下降法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def J(theta, X_b, y):\n",
    "    try:\n",
    "        return np.sum((y - X_b.dot(theta))**2) / len(X_b)\n",
    "    except:\n",
    "        return float('inf')\n",
    "\n",
    "def dJ_sgd(theta, X_b_i, y_i):\n",
    "    return X_b_i.T.dot(X_b_i.dot(theta)-y_i) * 2.\n",
    "\n",
    "def sgd(X_b, y, initial_theta, n_iters):\n",
    "    t0 = 5\n",
    "    t1 = 50\n",
    "    \n",
    "    def learning_rate(t):\n",
    "        return t0 / (t + t1)\n",
    "    \n",
    "    theta = initial_theta\n",
    "    i_iter = 0\n",
    "    for i_iter in range (n_iters):\n",
    "        rand_i = np.random.randint(len(X_b))\n",
    "        gradient = dJ_sgd(theta, X_b[rand_i], y[rand_i])\n",
    "        last_theta = theta\n",
    "        theta = theta - learning_rate(i_iter) * gradient\n",
    "        # 不能保证梯度一直是减小的\n",
    "#         if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):\n",
    "#             break\n",
    "    return theta\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 471 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "X_b = np.hstack([np.ones((len(X), 1)), X])\n",
    "initial_theta = np.zeros(X_b.shape[1])\n",
    "theta = sgd(X_b, y, initial_theta, n_iters=len(X_b)//3)   # 这里只检查了1/3样本，对于多元线性回归问题不能这样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2.94954458, 3.95898273])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "theta # 结果和上面差不多"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
